In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 

PLOTLY LIBRARIES¶

In [2]:
import plotly.express as px 
import plotly.graph_objects as go 
import plotly.figure_factory as ff
import plotly.subplots as make_subplots
In [3]:
df = pd.read_csv('heart_disease_uci.csv')
In [4]:
df.head()
Out[4]:
id age sex dataset cp trestbps chol fbs restecg thalch exang oldpeak slope ca thal num
0 1 63 Male Cleveland typical angina 145.0 233.0 True lv hypertrophy 150.0 False 2.3 downsloping 0.0 fixed defect 0
1 2 67 Male Cleveland asymptomatic 160.0 286.0 False lv hypertrophy 108.0 True 1.5 flat 3.0 normal 2
2 3 67 Male Cleveland asymptomatic 120.0 229.0 False lv hypertrophy 129.0 True 2.6 flat 2.0 reversable defect 1
3 4 37 Male Cleveland non-anginal 130.0 250.0 False normal 187.0 False 3.5 downsloping 0.0 normal 0
4 5 41 Female Cleveland atypical angina 130.0 204.0 False lv hypertrophy 172.0 False 1.4 upsloping 0.0 normal 0
In [5]:
df.describe()
Out[5]:
id age trestbps chol thalch oldpeak ca num
count 920.000000 920.000000 861.000000 890.000000 865.000000 858.000000 309.000000 920.000000
mean 460.500000 53.510870 132.132404 199.130337 137.545665 0.878788 0.676375 0.995652
std 265.725422 9.424685 19.066070 110.780810 25.926276 1.091226 0.935653 1.142693
min 1.000000 28.000000 0.000000 0.000000 60.000000 -2.600000 0.000000 0.000000
25% 230.750000 47.000000 120.000000 175.000000 120.000000 0.000000 0.000000 0.000000
50% 460.500000 54.000000 130.000000 223.000000 140.000000 0.500000 0.000000 1.000000
75% 690.250000 60.000000 140.000000 268.000000 157.000000 1.500000 1.000000 2.000000
max 920.000000 77.000000 200.000000 603.000000 202.000000 6.200000 3.000000 4.000000
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 920 entries, 0 to 919
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   id        920 non-null    int64  
 1   age       920 non-null    int64  
 2   sex       920 non-null    object 
 3   dataset   920 non-null    object 
 4   cp        920 non-null    object 
 5   trestbps  861 non-null    float64
 6   chol      890 non-null    float64
 7   fbs       830 non-null    object 
 8   restecg   918 non-null    object 
 9   thalch    865 non-null    float64
 10  exang     865 non-null    object 
 11  oldpeak   858 non-null    float64
 12  slope     611 non-null    object 
 13  ca        309 non-null    float64
 14  thal      434 non-null    object 
 15  num       920 non-null    int64  
dtypes: float64(5), int64(3), object(8)
memory usage: 115.1+ KB
In [7]:
df.isna().sum()
Out[7]:
id            0
age           0
sex           0
dataset       0
cp            0
trestbps     59
chol         30
fbs          90
restecg       2
thalch       55
exang        55
oldpeak      62
slope       309
ca          611
thal        486
num           0
dtype: int64
In [8]:
df.dropna(inplace = True)
In [9]:
df.isna().sum()
Out[9]:
id          0
age         0
sex         0
dataset     0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalch      0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
num         0
dtype: int64
In [10]:
df.head(2)
Out[10]:
id age sex dataset cp trestbps chol fbs restecg thalch exang oldpeak slope ca thal num
0 1 63 Male Cleveland typical angina 145.0 233.0 True lv hypertrophy 150.0 False 2.3 downsloping 0.0 fixed defect 0
1 2 67 Male Cleveland asymptomatic 160.0 286.0 False lv hypertrophy 108.0 True 1.5 flat 3.0 normal 2

1️. Age vs Cholesterol for Top 4 Chest Pain Types¶

In [13]:
top_leagues = df['cp'].value_counts().nlargest().index
display(top_leagues)

plt.figure(figsize=(13,6))
ax = sns.scatterplot(x='age',y='chol',data = df[df['cp'].isin(top_leagues)],hue='cp')
plt.xlabel("Age")
plt.ylabel("Cholestrol")
plt.title("Age vs Cholestrol for Top 4 Chest pain")
plt.legend(title = "Chest Pain Type",bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_facecolor("#ffffcc")
plt.show()
Index(['asymptomatic', 'non-anginal', 'atypical angina', 'typical angina'], dtype='object', name='cp')
No description has been provided for this image

2. Cholesterol vs Age (Colored by Chest Pain Type)¶

In [12]:
fig = px.scatter(data_frame = df,
x="age",
y="chol",
color="cp",
size='ca',
hover_data=['oldpeak'])
fig.update_layout(title_text="<b> Cholesterol Vs Age <b>",
titlefont={'size': 24, 'family':'Serif'},
width=1000,
height=500,
)  
fig.show()

3. Scatter Plot of Cholesterol vs Age (Colored by Sex)¶

In [16]:
import plotly.express as px 

fig = px.scatter(df , x='chol' , y='age' , color='sex')
fig.update_layout(width = 1000 , height = 500)
fig.update_layout(title_text = 'Scatter plot of Cholestrol vs Age (colored by Sex)')
fig.show()

4. Scatter Plot of Cholesterol vs Age (Faceted by Chest Pain Type, with Size for Oldpeak)¶

In [17]:
import plotly.express as px

fig = px.scatter(
    df, 
    x='chol', 
    y='age', 
    color='cp', 
    size='oldpeak', 
    size_max=30, 
    hover_name='exang', 
    facet_col='cp'
)

fig.update_layout(width=1000, height=500)
fig.update_layout(title_text='Scatter Plot of Cholesterol vs. Age (colored by cp)')


fig.show()

5️. Chest Pain Type vs Slope (Bar Chart)¶

In [18]:
def generate_rating_df(df):
    rating_df = df.groupby(['cp', 'slope']).agg({'id': 'count'}).reset_index()
    rating_df = rating_df[rating_df['id'] != 0]
    rating_df.columns = ['cp', 'slope', 'counts']
    rating_df = rating_df.sort_values('slope')
    return rating_df
    
rating_df = generate_rating_df(df)
fig = px.bar(rating_df, x='cp', y='counts', color='slope')
fig.update_traces(textposition='auto',
                  textfont_size = 20)
fig.update_layout(width=500 , height=500)
fig.update_layout(barmode='stack')

6️. Chest Pain vs Gender¶

In [19]:
fig = px.scatter(data_frame = df , 
                 x='age', 
                 y='chol',
                 size='ca',
                 size_max=30,
                 color='sex',
                 trendline='ols',
                 trendline_scope='overall',
                 trendline_color_override='black')

fig.update_layout(title_text="<b>Chest Pain vs Gender<b>",
                  titlefont={'size':24 , 'family':'Serif'},
                  width=1000,
                  height=500,
                 )

fig.show()

7️. Distribution of Age¶

In [20]:
fig = px.histogram(df,x='age',height=500 , width = 900 , template = 'simple_white',
                   color = 'sex',
                   color_discrete_sequence = ['salmon','lightblue'])

fig.update_layout(title={'text':'Histogram of Persons by Age','font':{'size':25}}
,title_font_family="Times New Roman",
title_font_color="darkgrey",
title_x=0.2)
fig.update_layout(
font_family='classic-roman',
font_color= 'grey',
yaxis_title={'text': " count", 'font': {'size':18}},
xaxis_title={'text': " Age", 'font': {'size':18}}
)
fig.show()

8️. Distribution of Cholesterol Levels by Chest Pain Type¶

In [21]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

asymptomatic = df[df['cp'] == 'asymptomatic']
non_anginal = df[df['cp'] == 'non-anginal']
atypical_angina = df[df['cp'] == 'atypical angina']
typical_angina = df[df['cp'] == 'typical angina']

fig = make_subplots(rows=2, cols=2,
                    specs=[[{'type': 'domain'}, {'type': 'domain'}],
                           [{'type': 'domain'}, {'type': 'domain'}]],
                    subplot_titles=("Asymptomatic", "Non-Anginal",
                                    "Atypical Angina", "Typical Angina"))

fig.add_trace(go.Pie(labels=asymptomatic["thal"], values=asymptomatic["chol"], name="asymptomatic"), 1, 1)
fig.add_trace(go.Pie(labels=non_anginal["thal"], values=non_anginal["chol"], name="non_anginal"), 1, 2)
fig.add_trace(go.Pie(labels=atypical_angina["thal"], values=atypical_angina["chol"], name="atypical_angina"), 2, 1)  # Fixed
fig.add_trace(go.Pie(labels=typical_angina["thal"], values=typical_angina["chol"], name="typical_angina"), 2, 2)

fig.update_layout(
    height=800,
    width=1000,
    title_text="Distribution of Cholesterol Levels by Chest Pain Type",
    title_font_size=24
)
fig.update_traces(textposition='inside', textfont_size=16)
fig.update_annotations(font_size=20)

fig.show()

9. Cholesterol Variation with Age¶

In [22]:
fig = px.bar(df , x='age' , y='chol' , hover_data = ['oldpeak'] , color='sex' , height=400)
fig.show()

10. Correlation HeatMap¶

In [23]:
import plotly.figure_factory as ff


def format_title(title, subtitle=None, subtitle_font="Arial", subtitle_font_size=12):
    title = f'<b>{title}</b>'
    if not subtitle:
        return title
    subtitle = f'<span style="font-family: {subtitle_font}; font-size: {subtitle_font_size}px;">{subtitle}</span>'
    return f'{title}<br>{subtitle}'

_ = df.groupby(['cp', 'thal']).chol.size().unstack().fillna(0)  # Fill NaN values with 0
z = _.values.tolist()
x = _.columns.tolist()
y = _.index.tolist()

fig = ff.create_annotated_heatmap(
    z=z,
    x=x,
    y=y,
    xgap=3,
    ygap=3,
    colorscale=[[0, '#53354A'], [1, '#E84545']],  # Defining scale explicitly
)

title = format_title('CP vs Thal', 'Chol Levels', 'Arial', 12)

fig.update_layout(
    width = 800,
    height = 400,
    title_text=title,
    title_x=0.5,
    titlefont={'size': 24, 'family': 'Proxima Nova'},
    template='plotly_dark',
    paper_bgcolor='#2B2E4A',
    plot_bgcolor='#2B2E4A',
    xaxis={'side': 'bottom'},
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    yaxis_autorange='reversed'
)

fig.show()

11. Age Histogram¶

In [24]:
plt.figure(figsize = (8,5))
sns.kdeplot(df.age, shade = True, color = "r")
plt.title("Age Histogram", fontsize = 20)
plt.show()
print("Histogram's skewness is {} and kurtosis is {}".format(df.age.skew(), df.age.kurtosis()))
C:\Users\a3388\AppData\Local\Temp\ipykernel_8408\942704215.py:2: FutureWarning:



`shade` is now deprecated in favor of `fill`; setting `fill=True`.
This will become an error in seaborn v0.14.0; please update your code.


No description has been provided for this image
Histogram's skewness is -0.21485314045391055 and kurtosis is -0.5174882052116159

12. Chest Pain According to Gender¶

In [25]:
template = ['ggplot2','plotly_dark', 'seaborn', 'simple_white', 'plotly']
fig = px.histogram(df,
x="cp",
y=None,
color="sex",
width=1200,
height=450,
histnorm='percent',
color_discrete_map={
"male": "RebeccaPurple", "female": "lightsalmon"
},
template="plotly_dark"
)
fig.update_layout(title="Gender Chest Pain",
font_family="San Serif",
bargap=0.2,
barmode='group',
titlefont={'size': 24},
legend=dict(
orientation="v", y=1, yanchor="top", x=1.25, xanchor="right")
)
fig.show()

13. Exploring Relationships Between Age, Cholesterol, and Heart Rate by Chest Pain Type¶

In [26]:
sns.pairplot(df[['cp','age','chol','thalch']], hue='cp', aspect=1.5,dropna=True,palette='bright')
plt.show()
No description has been provided for this image

14. Cholesterol Trends by Age, Sex, and Chest Pain Type¶

In [27]:
import seaborn as sns
import matplotlib.pyplot as plt

heart_df_fg = sns.FacetGrid(
    data=df, 
    col="sex", 
    hue="sex", 
    row="cp", 
    height=4, 
    aspect=1.3, 
    palette='Dark2', 
    col_order=["Male", "Female"]
    
)              
heart_df_fg.map_dataframe(sns.regplot, "age", "chol")
# Use dark background
plt.style.use('default')
plt.show()
No description has been provided for this image

15. Sex-Wise Distribution of Key Heart Disease Indicators¶

In [28]:
plt.figure(figsize=(20,8))

for i,col in enumerate(['age','chol','oldpeak'],1):
    plt.subplot(1,3,i)
    ax = sns.barplot(x='sex' , y=col , data = df,color='gold')
    plt.title(f'{col} Comparison')
    plt.ylabel(col if i==1 else '')

    # Adding count values above each bar
    for i in range(len(ax.containers)):
        ax.bar_label(ax.containers[i] , label_type = 'edge')

plt.show()
No description has been provided for this image

Conclusion :¶

1️⃣ Age and Gender Influence on Heart Disease:¶

- Older individuals and males generally have higher cholesterol and blood pressure, making them more prone to heart disease.¶

- Males also show a higher prevalence of exercise-induced angina compared to females.¶

2️⃣ Chest Pain Type and Heart Disease:¶

- Asymptomatic and atypical angina are the most common chest pain types among patients diagnosed with heart disease.¶

- Typical angina is less associated with heart disease, suggesting it might not always indicate a severe condition.¶

3️⃣ Cholesterol and Max Heart Rate Relationship:¶

- Patients with high cholesterol tend to have lower max heart rates, indicating potential cardiovascular inefficiency.¶

- Abnormal ECG results are more frequent in individuals with high cholesterol and chest pain.¶

4️⃣ Exercise-Induced Angina as a Risk Factor:¶

- Many patients with heart disease experience exercise-induced angina, reinforcing its importance as a symptom.¶

- This suggests that stress tests can be useful in identifying high-risk individuals.¶

5️⃣ Dataset Balance and Key Predictors:¶

- The dataset contains a mix of heart disease and non-heart disease patients, allowing for meaningful comparisons.¶

- Features like cholesterol, max heart rate, chest pain type, ECG results, and exercise-induced angina stand out as strong indicators of heart disease.¶